0.1 Introduction

We downloaded the 779 samples (826 SRA files/runs) from ENA (Bioproject ID: PRJNA353588). The quality control of the fastq files were done with FastQC (Andrews, 2010) and fastp (Chen et al., 2018). fastp was also used to trim adapters and low-quality reads. After trimming, we indexed human GRCh38 cDNA/transcriptome with kallisto index and performed pseudoalignment to the with kallisto quant function (Bray et al., 2016).

library(rhdf5)
library(tidyverse)
library(tximport) 
library(ensembldb)
library(EnsDb.Hsapiens.v86)

#set file paths to your dataset
path <- file.path(paste0("../data/raw_data/aligned/", phenodataF$Run), "abundance.tsv") 

#get annotations using organism-specific package
Tx <- transcripts(EnsDb.Hsapiens.v86, 
                  columns=c("tx_id", "gene_name", "entrezid"))
Tx <- as_tibble(Tx)
#need to change first column name to 'target_id'
Tx <- dplyr::rename(Tx, target_id = tx_id)
#save detailed annotation as TxOG
TxOG <- Tx
TxOG <- TxOG %>% rename_with(~"gene", gene_name)
#transcript ID needs to be the first column in the dataframe
Tx <- dplyr::select(Tx, "target_id", "gene_name")

#import all kallisto files with counts
Txi_gene <- tximport(path, 
                     type = "kallisto", 
                     tx2gene = Tx, 
                     txOut = FALSE, 
                     countsFromAbundance = "lengthScaledTPM",
                     ignoreTxVersion = TRUE)

0.2 Data Exploration

0.2.1 Heteroscedasticity

# Load packages -----
suppressPackageStartupMessages({
library(tidyverse) 
library(edgeR) 
library(matrixStats)
library(cowplot) 
library(RUVSeq)
library(plotly)
library(DESeq2)
library(DEFormats)
library(glmGamPoi)
library(swamp)
library(ggpubr)
library(MetBrewer)
})

# Examine your data up to this point ----
myTPM <- Txi_gene$abundance
myCounts <- Txi_gene$counts

colnames(myCounts) <- phenodataF$sampleLabels
colnames(myTPM) <- phenodataF$sampleLabels

# Generate summary stats for your data ----
# 1st, calculate summary stats for each transcript or gene, and add these to your data matrix
# then use the base R function 'transform' to modify the data matrix (equivalent of Excel's '=')
# then we use the 'rowSds', 'rowMeans' and 'rowMedians' functions from the matrixStats package
myTPM.stats <- transform(myTPM, 
                         SD=rowSds(myTPM), 
                         AVG=rowMeans(myTPM),
                         MED=rowMedians(myTPM))


TPMplot <- ggplot(myTPM.stats) + 
  aes(x = SD, y = MED) +
  geom_point(shape=16, size=2) +
  geom_smooth(method=lm) +
  geom_hex(show.legend = T) +
  labs(y="Median", x = "Standard deviation",
       title="Transcripts per million (TPM)",
       subtitle="unfiltered, non-normalized data",
       caption="") +
  theme_cowplot()
TPMplot

Highly expressed genes have higher variation, while lowly expressed genes have smaller variation.

0.2.2 Filtering and normalization

#Collapse technical replicates

#create Deseq dataset ----
ddsRaw <- DESeqDataSetFromTximport(Txi_gene, 
                                colData=phenodataF, 
                                design= ~class)
colnames(ddsRaw@assays@data@listData[["counts"]]) <- ddsRaw@colData@listData[["sampleLabels"]]

#collapse technical replicates
ddsColl <- collapseReplicates(ddsRaw, ddsRaw$geo_accession, ddsRaw$Run)

phenodataColl <- data.frame(ddsColl@colData@listData)
phenodataColl$Run <- phenodataColl$runsCollapsed
phenodataColl <- phenodataColl[1:23]

#remove the one sample from PISA hospital
rmix <- which(rownames(colData(ddsColl)) == "GSM2391029")
ddsColl <- ddsColl[,-rmix]
ddsColl$Hospital <- droplevels(ddsColl$Hospital)

colnames(ddsColl@assays@data@listData[["counts"]]) <-
  ddsColl@colData@listData[["sampleLabels"]]

#assign collapsed counts
myCountsColl <- ddsColl@assays@data@listData[["counts"]]
colnames(myCountsColl) <- ddsColl@colData@listData[["sampleLabels"]]

#Filtering low counts! ----
y <- as.DGEList(ddsColl)
keep <- filterByExpr(y) #design ~class 

y <- y[keep,] #20752 14545
ddsFilt <- ddsColl[keep,]

#plot the count distributions before and after filtering
inputCounts <-cpm(ddsColl@assays@data@listData[["counts"]], log = T)
dd <- reshape2::melt(inputCounts, variable.name = "sample")

densR <- ggplot(dd, aes(value, colour = Var2)) + geom_density() +
  theme_cowplot() +
  xlab("Log-cpm")+
  ggtitle("Raw Data")+
  theme(legend.position="none") + 
  xlim(min(dd$value), max(dd$value))

inputCounts <-cpm(ddsFilt@assays@data@listData[["counts"]], log = T)
dd <- reshape2::melt(inputCounts, variable.name = "sample")

densFilt <- ggplot(dd, aes(value, colour = Var2)) + geom_density() +
  theme_cowplot() +
  xlab("Log-cpm")+
  ggtitle("Filtered Data")+
  theme(legend.position="none") + 
  xlim(min(dd$value), max(dd$value))

ggarrange(densR, densFilt)

We first collapsed technical replicates (collapseReplicates from DESeq2) and removed the one sample from PISA hospital (GSM2391029). We filtered lowly expressed genes by using filterByExpr from edgeR (min.count = 10, design = ~class, min. group size = 376). This reduced the number of genes from 35297 to 14545

# Make a DGElist from your counts ----

##make the dge list for rawdata (before collapsing) ----
myDGEListRaw = as.DGEList(ddsRaw)
myDGEListColl <- as.DGEList(ddsColl)
myDGEListFilt <- as.DGEList(ddsFilt)

# The names argument tells the barplot to use the sample names on the x-axis
# The las argument rotates the axis names
LibSizeDF <- data.frame(Sample = myDGEListColl$samples$Run, 
                        Libsize = myDGEListColl$samples$lib.size,
                        Class = myDGEListColl$samples$group, 
                        Hospital =myDGEListColl$samples$Hospital,
                        Patient = myDGEListColl$samples$Patient_group,
                        StorageT = myDGEListColl$samples$submission_date)  %>%
  arrange(Libsize)  # Order by LibSize


#LibSizeDF$Sample <- factor(LibSizeDF$Sample, levels = LibSizeDF$Sample)
libsizeplot <- ggplot(LibSizeDF, aes(x = Sample, y = Libsize, fill = Patient)) +
  geom_bar(stat = "identity") +
  labs(x = "Sample", y = "Library Size", title = "Library Size per Sample")+
  theme(axis.text.x = element_blank())+
  scale_fill_met_d("Signac")
libsizeplot

Library sizes of 778 samples are shown in the figure. Colors are for each group.

#variance-stabilizing transformation ----
vsd <- vst(ddsFilt, blind=F)
pc <- prcomp(t(assay(vsd)))

#calculate percentages
pc.var<-pc$sdev^2 # sdev^2 captures these eigenvalues from the PCA result
pc.per<-round(pc.var/sum(pc.var)*100, 1) # we can then use these eigenvalues to calculate the percentage variance explained by each PC

pca.res.df <- as_tibble(pc$x)
class = vsd$class

###find outliers ----
idx <- pc$x[,1] < -190 | pc$x[,2] > 100

myplot <- ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, color = class, shape = idx, text = vsd$Run)+
  geom_point()+
  theme_bw() +
  #geom_label(nudge_y = 0, nudge_x = 0) +
  #stat_ellipse() +
  xlab(paste0("PC1 (",pc.per[1],"%",")"))+
  ylab(paste0("PC2 (",pc.per[2],"%",")"))+ 
  labs(title="PCA plot of filtered data",
       caption=paste0("produced on ", Sys.time()))+
  coord_fixed()

ggplotly(myplot)

We performed variance-stabilizing transformation (vst from DESeq2) to explore the data with principal component analysis (PCA). We removed two outliers we observed in PCA plots (GSM2390915 and GSM2627443).

###remove outliers and redo vst ----
class <- vsd$class
class <- class[!idx]
ddsFiltF <- ddsFilt[,!idx]
colData(ddsFiltF) <- droplevels(colData(ddsFiltF))

vsd <- vst(ddsFiltF, blind=F)
pc <- prcomp(t(assay(vsd)))

#calculate percentages
pc.var<-pc$sdev^2 # sdev^2 captures these eigenvalues from the PCA result
pc.per<-round(pc.var/sum(pc.var)*100, 1) # we can then use these eigenvalues to calculate the percentage variance explained by each PC

pca.res.df <- as_tibble(pc$x)

myplot1 <- ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, color = class, text = vsd$Run)+
  geom_point()+
  theme_bw() +
  #geom_label(nudge_y = 0, nudge_x = 0) +
  #stat_ellipse() +
  xlab(paste0("PC1 (",pc.per[1],"%",")"))+
  ylab(paste0("PC2 (",pc.per[2],"%",")"))+ 
  labs(title="PCA plot w/o outliers",
       caption=paste0("produced on ", Sys.time()))+
  coord_fixed()
  #scale_color_met_d("Veronese")

pca.res.df <- pc$x[,1:4] %>% 
  as_tibble() %>%
  add_column(sample = vsd$sampleLabels,
             group = class, 
             hospital = vsd$Hospital,
             W5 = vsd$W5,
             disease = vsd$Patient_group)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC4, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)

myplot2 <- ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=disease) + 
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_cowplot() +
  coord_flip()+
  theme(axis.text.y = element_blank())

plot_grid(
  myplot1, myplot2,
  labels = c('A', 'B'),
  align="hv",
  nrow = 2,
  ncol = 1
)

After removing outliers, we reprepare the PCA plot (Figure A). We can also see the first 4 PCs causing variation in the data (Figure B).

###estimate RUV factors + technical variation----
#https://github.com/mikelove/preNivolumabOnNivolumab/blob/main/preNivolumabOnNivolumab.knit.md

#First we see what happens when we run simple DESeq2 analysis comparing the two groups, 
#without attempting to control for the technical variation. Here we use glmGamPoi which 
#is an efficient method for estimating dispersion when we have many samples. This can 
#speed up the analysis by an order of magnitude. For details, see: https://doi.org/10.1093/bioinformatics/btaa1009

ddsIn <- ddsFiltF
ddsIn <- DESeq(ddsIn, test="LRT", reduced=~1, fitType="glmGamPoi")

res <- results(ddsIn)
table(res$padj < .05)
## 
## FALSE  TRUE 
##  7186  7359

There are thousands of differentially expressed (DE) genes, which, as we will demonstrate, stem from uncontrolled confounding factors. Ignoring technical variation is not suitable when it correlates with the condition.

set <- newSeqExpressionSet(counts(ddsIn))
set <- betweenLaneNormalization(set, which="upper")
not_sig <- rownames(res)[which(res$padj > .05 & abs(res$log2FoldChange) < .05)]
empirical <- rownames(set)[ rownames(set) %in% not_sig ]
set <- RUVg(set, empirical, k=5)

pdat <- pData(set)
pdat$class <- class

vsd$W1 <- pdat$W_1
vsd$W2 <- pdat$W_2
vsd$W3 <- pdat$W_3
vsd$W4 <- pdat$W_4
vsd$W5 <- pdat$W_5

#We can visualize how the factors of unwanted variation describe the samples 
#in the PC1 and PC2 space:

pc <- prcomp(t(assay(vsd)))

#calculate percentages
pc.var<-pc$sdev^2 # sdev^2 captures these eigenvalues from the PCA result
pc.per<-round(pc.var/sum(pc.var)*100, 1) # we can then use these eigenvalues to calculate the percentage variance explained by each PC
pca.res.df <- as_tibble(pc$x)

hospitalplot <- ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, color = vsd$Hospital, text = vsd$Run)+
  geom_point()+
  theme_bw() +
  #geom_label(nudge_y = 0, nudge_x = 0) +
  #stat_ellipse() +
  guides(color=guide_legend(title="hospital"))+
  xlab(paste0("PC1 (",pc.per[1],"%",")"))+
  ylab(paste0("PC2 (",pc.per[2],"%",")"))+ 
  coord_fixed()+
  scale_color_brewer(palette = "Set1")  

p1 <- ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, color = vsd$W1, text = vsd$Run)+
  geom_point()+
  theme_bw() +
  #geom_label(nudge_y = 0, nudge_x = 0) +
  #stat_ellipse() +
  guides(color=guide_colourbar(title="UV1"))+
  xlab(paste0("PC1 (",pc.per[1],"%",")"))+
  ylab(paste0("PC2 (",pc.per[2],"%",")"))+ 
  coord_fixed()+
  scale_color_met_c("VanGogh1")

p2 <- ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, color = vsd$W2, text = vsd$Run)+
  geom_point()+
  theme_bw() +
  #geom_label(nudge_y = 0, nudge_x = 0) +
  #stat_ellipse() +
  guides(color=guide_colourbar(title="UV2"))+
  xlab(paste0("PC1 (",pc.per[1],"%",")"))+
  ylab(paste0("PC2 (",pc.per[2],"%",")"))+ 
  coord_fixed()+
  scale_color_met_c("VanGogh1")

p3 <- ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, color = vsd$W3, text = vsd$Run)+
  geom_point()+
  theme_bw() +
  guides(color=guide_colourbar(title="UV3"))+
  #geom_label(nudge_y = 0, nudge_x = 0) +
  #stat_ellipse() +
  xlab(paste0("PC1 (",pc.per[1],"%",")"))+
  ylab(paste0("PC2 (",pc.per[2],"%",")"))+ 
  coord_fixed()+
  scale_color_met_c("VanGogh1")

p4 <- ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, color = vsd$W4, text = vsd$Run)+
  geom_point()+
  theme_bw() +
  guides(color=guide_colorbar(title="UV4"))+
  #geom_label(nudge_y = 0, nudge_x = 0) +
  #stat_ellipse() +
  xlab(paste0("PC1 (",pc.per[1],"%",")"))+
  ylab(paste0("PC2 (",pc.per[2],"%",")"))+ 
  coord_fixed()+
  scale_color_met_c("VanGogh1")


p5 <- ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, color = vsd$W5, text = vsd$Run)+
  geom_point()+
  theme_bw() +
  guides(color=guide_colorbar(title="UV5"))+
  #geom_label(nudge_y = 0, nudge_x = 0) +
  #stat_ellipse() +
  xlab(paste0("PC1 (",pc.per[1],"%",")"))+
  ylab(paste0("PC2 (",pc.per[2],"%",")"))+ 
  coord_fixed()+
  scale_color_met_c("VanGogh1")



figure <- ggarrange(p1,p2,p3,p4,p5,hospitalplot,
          #labels = c("UV1", "UV2", "UV3", "UV4", "UV5"),
          #font.label = list(size = 10, face = "bold"),
          ncol = 2, nrow = 3, vjust = 1.5, hjust = -1)



figureAnnot <- annotate_figure(figure,
                top = text_grob("Unwanted Variation in Filtered Data", face = "bold", size = 12),
                bottom = text_grob("(vst, outlier removed)",
                                   hjust = 1, x = 1, size = 10))
figureAnnot

Next, we checked for unwanted variation (UV) in the data using RUVg from RUVSeq. For this, we generated a list of stable (empirical) genes. We fit a Gamma-Poisson generalized linear model to the filtered data and ran a likelihood ratio test (LRT) to find genes that do not have changes in their expression across different levels of the experimental conditions (~class). We then defined the empirical genes as the genes with log2FC between -0.05 and 0.05 and with adjusted p-values larger than 0.05. We ended up with 1491 genes as empirical genes and using this list we performed RUV with k=5.


0.3 Differential Gene Expression (DGE) Analysis

0.3.1 DGE Analysis with DESeq2

library("apeglm")
library(DESeq2)
library(gt)
library(biomaRt)
library(org.Hs.eg.db)
library(tibble)
library(dplyr)

#add variation factors to design and do DGE analysis----
colData(ddsIn) <- cbind(colData(ddsIn), pdat[,1:5])
design(ddsIn) <- ~W_1 + W_2 + W_3 + W_4 + W_5 + class

We included the 5 UV factors in the design matrix alongside the condition of interest (~class: non-cancer vs. NSCLC) and performed differential expression (DE) analys is with DESeq from DESeq2.

#check dispersion to figure out the fit type----
#following https://support.bioconductor.org/p/81094/

DDS <- estimateSizeFactors(ddsIn)
par <- estimateDispersions(DDS, fitType = "parametric")
loc <- estimateDispersions(DDS, fitType = "local")
glmgam <- estimateDispersions(DDS, fitType = "glmGamPoi")

par(mfrow =c(1,3))
plotDispEsts(glmgam, main= "dispEst: glmGamPoi")
plotDispEsts(par, main= "dispEst: parametric")
plotDispEsts(loc, main= "dispEst: local")


#check residuals to decide on fit type
residualg <- log(mcols(glmgam)$dispGeneEst) - log(mcols(glmgam)$dispFit)
residualp <- log(mcols(par)$dispGeneEst) - log(mcols(par)$dispFit)
residuall <- log(mcols(loc)$dispGeneEst) - log(mcols(loc)$dispFit)

residualsT <- tibble("fit type" = c("glmGamPoi", "parametric", "local"),
          "median absolute residual" =c(median(abs(residualg), na.rm = T),
                                      median(abs(residualp), na.rm = T),
                                      median(abs(residuall), na.rm = T)))

residualsT %>%
  gt() %>%
  fmt_number(columns=2, decimals = 3) %>%
  tab_header(title = md("**Dispersion Estimates**"),
             subtitle = md("**to choose fit type***")) %>%
  tab_source_note(
    source_note = md("*the type of fitting of dispersions to the mean intensity."))

We tried three diffent methods to estimate dispersions and chose the one with lowest log10-residual.

#run with selected fit type----

dds <- DESeq(ddsIn, test="LRT", reduced=~W_1 + W_2 + W_3 + W_4 + W_5,
             fitType="glmGamPoi")

resD<- results(dds, name ="classNSCLC")
plotMA(resD,ylim=c(-2,2))

#Get results in tibbles ----
resD_tb <- resD %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

##get descriptions for the genes----
annotations <- read.csv("../data/processed_data/resD_tb_annotations.csv", row.names = 1)

##get differentially expressed genes----
padj.cutoff <- 0.05
lfc.cutoff <- 0.58

resD_sig <- resD_tb %>% dplyr::filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)
resD_sig0 <- resD_tb %>% dplyr::filter(padj < padj.cutoff)

orderedRes <- resD_tb %>% arrange(padj)
#write.csv(orderedRes, file="../data/processed_data/Deseq2Results.csv")
normCountsD <- counts(dds, normalized = TRUE)

##histograms ----
hist(resD$log2FoldChange, 
     col = "white",
     breaks = 100,
     #breaks=0:50/50, 
     xlab="log2FC", main="Histogram of log2FCs")
abline(v = c(-0.5, 0.5), col = "#973d21", lwd = 1.5, lty = 'dashed')

hist(resD$pvalue, 
     col = "white",
     breaks=0:50/50,
     xlab="p value", main="Histogram of p values")
abline(v = c(0.05, 0.01), col = c("#e78429","#ab3329"), lwd = 1.5, lty = 'dashed')
text(x=0.025, y=6200, '0.01', col="#ab3329", srt = 270, cex = 0.8)
text(x=0.065, y=6200, '0.05', col="#e78429", srt = 270, cex = 0.8)

Genes that have log2FC values smaller than -0.58 and larger than 0.58 adjusted p-values smaller than 0.05 were classified as differentially expressed genes.

library(EnhancedVolcano)

top.x = resD_tb
top.x.b <- top.x[abs(top.x$log2FoldChange) > 0.58,]
top.x.b <- top.x.b[order(top.x.b$padj),]
GeneSymbol <- top.x$gene
GeneSymbol2 <- top.x.b$gene

print(EnhancedVolcano(top.x,
                      lab = GeneSymbol,
                      selectLab = GeneSymbol2[1:30],
                      col = c("grey30",  "#7db0ea", "#89ab7c", "#a00e00"),
                      x = 'log2FoldChange',
                      y = 'padj',
                      xlab = bquote(~Log[2]~ 'fold change'),
                      pCutoff = 0.05,
                      FCcutoff = 0.58,
                      cutoffLineType = 'twodash',
                      cutoffLineWidth = 0.8,
                      pointSize = 2.0,
                      labSize = 4.0,
                      colAlpha = 0.3,
                      axisLabSize = 12,
                      legendLabels=c('Not sig.','Not sig. Log2FC','adj.p.value',
                                     'adj.p.value & Log2FC'),
                      legendPosition = 'right',
                      legendLabSize = 12,
                      legendIconSize = 4.0,
                      drawConnectors = TRUE,
                      widthConnectors = 0.5,
                      maxoverlapsConnectors = 50,
                      boxedLabels = T,
                      xlim = c(-2, 2),
                      ylim = c(0, -log10(min(top.x$padj))*1.1),
                      title = "NSCLC vs. nonCancer"))

####make count plots----
dds$Patient_group <- gsub("_", " ", dds$Patient_group)
dds$Patient_group <- factor(dds$Patient_group)


#for multiple #for multiple #for multiple genes

gene_set <- topnD$gene[1:6]
names(gene_set) <- gene_set

for (i in 1:length(gene_set)) {
  gene <- gene_set[i]
  
  d <- plotCounts(dds, gene=gene, intgroup="class", 
                  returnData=TRUE)
  
  mean_team <- d %>% group_by(class) %>% 
    summarise(mean_pts=mean(count))
  plot <- ggplot(d, aes(x=class, 
                        y=count, 
                        colour = class)) + 
    geom_point(position=position_jitter(w=0.1,h=0)) +
    scale_y_log10(breaks=c(25,100,400,1000))+
    theme_cowplot() +
    ggtitle(gene)+
    theme(legend.position = "none", axis.text.x = element_blank(), 
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          strip.background = element_rect(fill="white"),
          strip.placement = "outside",
          strip.clip = "off",
          strip.text = element_text(size = 12, angle = 90, hjust = 1)) +
    facet_grid(~class, 
               shrink = F, scales = "free", space = "free", switch = "x") +
    geom_hline(aes(yintercept = mean_pts, color = class), 
               mean_team, linewidth = 1)
  
  assign(paste0("plot", i), plot)
  
}
classplot <- ggarrange(plot1, plot2, plot3, plot4, plot5, plot6)

for (i in 1:length(gene_set)) {
  gene <- gene_set[i]
  
  d <- plotCounts(dds, gene=gene, intgroup="Patient_group", 
                  returnData=TRUE)
  
  mean_team <- d %>% group_by(Patient_group) %>% 
    summarise(mean_pts=mean(count))
  plot <- ggplot(d, aes(x=Patient_group, 
                        y=count, 
                        colour = Patient_group)) + 
    geom_point(position=position_jitter(w=0.1,h=0)) +
    scale_y_log10(breaks=c(25,100,400))+
    theme_cowplot() +
    ggtitle(gene)+
    theme(legend.position = "none", axis.text.x = element_blank(), 
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          strip.background = element_rect(fill="white"),
          strip.placement = "outside",
          strip.clip = "off",
          strip.text = element_text(size = 12, angle = 90, hjust = 1)) +
    facet_grid(~Patient_group, labeller = label_wrap_gen(width=10), 
               shrink = F, scales = "free", space = "free", switch = "x") +
    geom_hline(aes(yintercept = mean_pts, color = Patient_group), 
               mean_team, linewidth = 1)
  
  assign(paste0("plot", i), plot)
  
}
patientplot <- ggarrange(plot1, plot2, plot3, plot4, plot5, plot6)
ggarrange(classplot, patientplot, nrow =2, ncol = 1)

0.3.2 DGE Analysis with Limma

library(edgeR)
library(gt)
library(DEFormats)
library(Glimma)
library(DT)

myDGEListIn <- as.DGEList(ddsIn)
myDGEListIn.norm <- calcNormFactors(myDGEListIn, method = "TMM")
myDGEListIn.norm$samples <- droplevels(myDGEListIn.norm$samples)

group <- myDGEListIn.norm$samples$group
W_1 <- myDGEListIn.norm$samples$W_1
W_2 <- myDGEListIn.norm$samples$W_2
W_3 <- myDGEListIn.norm$samples$W_3
W_4 <- myDGEListIn.norm$samples$W_4
W_5 <- myDGEListIn.norm$samples$W_5

design <- model.matrix(~0 + group + W_1 + W_2 + W_3 + W_4 + W_5)
colnames(design)[1:2] <- c("noncancer", "NSCLC")

v.myDGEListIn.norm <- voom(myDGEListIn.norm, design, plot = TRUE)

#fit linear model
fit <- lmFit(v.myDGEListIn.norm, design)

# Contrast matrix ----
contrast.matrix <- makeContrasts(NSCLC - noncancer, levels = design) #limma

# extract the linear model fit -----
fits <- contrasts.fit(fit, contrast.matrix)
#get bayesian stats for your linear model fit
ebFit <- eBayes(fits)

#make MAplot
limma::plotMA(fits, ylim=c(-2,2))
abline(h=0, col="red", lwd=2)

top.table <- topTable(ebFit, adjust ="BH", sort.by = "P", n = Inf)

resL_tb <- top.table %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

orderedResL <- resL_tb %>% arrange(adj.P.Val)
orderedResL <- merge(orderedResL, annotations, by='gene', all.x = T) %>% 
  distinct(., gene, .keep_all = T)
orderedResL <- merge(orderedResL, TxOG[6:8], by='gene', all.x = T) %>% 
  distinct(., gene, .keep_all = T)

orderedResL$description <- unlist(lapply(strsplit(as.character(orderedResL$description), "\\["), '[[', 1))
orderedResL <- orderedResL %>% dplyr::select(gene, description, target_id, entrezid, logFC, AveExpr, P.Value, adj.P.Val)
orderedResL$entrezid <-  unlist(sapply(orderedResL$entrezid,"[[",1))

#histograms ----
hist(resL_tb$logFC, 
     col = "white",
     breaks = 100,
     #breaks=0:50/50, 
     xlab="log2FC", main="Histogram of log2FCs")
abline(v = c(-0.5, 0.5), col = "#973d21", lwd = 1.5, lty = 'dashed')

hist(resL_tb$P.Value, 
     col = "white",
     breaks=0:50/50,
     xlab="p value", main="Histogram of p values")
abline(v = c(0.05, 0.01), col = c("#e78429","#ab3329"), lwd = 1.5, lty = 'dashed')
text(x=0.025, y=5800, '0.01', col="#ab3329", srt = 270, cex = 0.8)
text(x=0.065, y=5800, '0.05', col="#e78429", srt = 270, cex = 0.8)

##get differentially expressed genes----
results <- decideTests(ebFit, method="global", adjust.method="BH", p.value=0.05, lfc=0.58)
diffGenes <- v.myDGEListIn.norm$E[results[,1] !=0,]

#convert your DEGs to a dataframe using as_tibble
diffGenes.df <- as_tibble(diffGenes, rownames = "geneID")

#get sig dfs----
padj.cutoff <- 0.05
lfc.cutoff <- 0.58

resL_sig <- resL_tb %>%
  dplyr::filter(`adj.P.Val` < padj.cutoff & abs(logFC) > lfc.cutoff)

resL_sig0 <- resL_tb %>%
  dplyr::filter(`adj.P.Val` < padj.cutoff)

##make the top table----

#add descriptions
resL_sig2 <- merge(resL_sig, annotations, by='gene', all.x = T) %>% 
  distinct(., gene, .keep_all = T)

resL_sig2$description <- unlist(lapply(strsplit(as.character(resL_sig2$description), "\\["), '[[', 1))

# create interactive tables to display your DEGs ----

resL_sig2 <- resL_sig2 %>% dplyr::select(gene, description, AveExpr, logFC, P.Value, adj.P.Val)
datatable(resL_sig2,
          extensions = c('KeyTable', "FixedHeader"),
          caption = 'Table 1: DEGs in NSCLC',
          options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
  formatRound(columns=c(3:6), digits=2)
#compare DESeq and Limma----
library(ggVennDiagram)

v0 <- ggVennDiagram(list(topnD$gene, topnL$gene),
              category.names = c("DESeq2", "Limma voom"),
              label_alpha = 0,
              set_size = 4,
              set_color = "#5F5F5F",
              edge_size = 0)+
  scale_fill_gradient(low = "#f6f2ee", high =  "#E4B09A")+
  #scale_x_continuous(expand = expansion(mult = .3))+
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5, size = 12, colour = "#5F5F5F", face = "bold"),
        plot.subtitle  = element_text(hjust = 0.5, colour = "#5F5F5F"))+
  labs(title = "Top 20 genes",
       subtitle = "assessed by two methods")+
  coord_flip()

v1 <- ggVennDiagram(list(resD_sig$gene, resL_sig$gene),
              category.names = c("DESeq2", "Limma voom"),
              label_alpha = 0,
              set_size = 4,
              set_color = "#5F5F5F",
              edge_size = 0) +
  scale_fill_gradient(low = "#f6f2ee", high =  "#87bcbd")+
  scale_x_continuous(expand = expansion(mult = .3))+
  #coord_flip()+
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5, size = 12, colour = "#4E696A", face = "bold"),
        plot.caption = element_text(hjust = 0.5, colour = "#5F5F5F"))+
  labs(title = "Differentially expressed genes",
       caption = "|log2FC| > 0.58 & adj.p-value < 0.05")


v2 <- ggVennDiagram(list(resD_sig0$gene, resL_sig0$gene),
              category.names = c("DESeq2", "Limma voom"),
              label_alpha = 0,
              set_size = 4,
              set_color = "#5F5F5F",
              edge_size = 0) +
  scale_fill_gradient(low = "#f6f2ef", high =  "#7c4b73")+
  scale_x_continuous(expand = expansion(mult = .3))+
  #coord_flip()+
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5, size = 12, colour = "#52334C", face = "bold"),
        plot.caption = element_text(hjust = 0.5, colour = "#5F5F5F"))+
  labs(title = "Significantly expressed genes",
       caption = "adj.p-value < 0.05")

ggarrange(v1,v2)

library(EnhancedVolcano)

top.x = resL_tb
top.x.b <- top.x[abs(top.x$logFC) > 0.58,]
top.x.b <- top.x.b[order(top.x.b$adj.P.Val),]
GeneSymbol <- top.x$gene
GeneSymbol2 <- top.x.b$gene

print(EnhancedVolcano(top.x,
                      lab = GeneSymbol,
                      selectLab = GeneSymbol2[1:30],
                      col = c("grey30",  "#7db0ea", "#89ab7c", "#a00e00"),
                      x = 'logFC',
                      y = 'adj.P.Val',
                      xlab = bquote(~Log[2]~ 'fold change'),
                      pCutoff = 0.05,
                      FCcutoff = 0.58,
                      cutoffLineType = 'twodash',
                      cutoffLineWidth = 0.8,
                      pointSize = 2.0,
                      labSize = 4.0,
                      colAlpha = 0.5,
                      axisLabSize = 12,
                      legendLabels=c('Not sig.','Not sig. Log2FC','adj.p.value',
                                     'adj.p.value & Log2FC'),
                      legendPosition = 'right',
                      legendLabSize = 12,
                      legendIconSize = 4.0,
                      drawConnectors = TRUE,
                      widthConnectors = 0.5,
                      maxoverlapsConnectors = 50,
                      boxedLabels = T,
                      xlim = c(-2, 2),
                      ylim = c(0, -log10(min(top.x$adj.P.Val))*1.1),
                      title = "NSCLC vs. nonCancer"
))


0.4 Clustering DEGs

# Description -----------
#this script creates heatmaps from differentially expressed genes or transcripts
#and selects modules of co-expressed genes based on pearson correlations

# Load packages -----
library(tidyverse)
library(limma) #we only use limma in this script for the 'avearrays' function
library(RColorBrewer) #need colors to make heatmaps
library(gplots) #the heatmap2 function in this package is a primary tool for making heatmaps
library(pheatmap)
library(cowplot)
library(plyr)

# Choose Colors ----
myheatcolors <- colorRampPalette(colors=c("blue","white","red"))(100)

# Prepare Data----
vIn <- v.myDGEListIn.norm
results <- decideTests(ebFit, method="global", adjust.method="BH", p.value=0.05, lfc=0.58)
disease <- myDGEListIn$samples$Patient_group
disease <- revalue(disease, c("Chronic_Pancreatitis" = "Chronic Pancreatitis",
                              "Epilepsy" = "Epilepsy",
                              "Healthy_Control" = "Healthy",
                              "Multiple_Sclerosis" = "MS",
                              "Non-significant_atherosclerosis" = "NS Atherosclerosis",
                              "NSCLC" = "NSCLC",
                              "Pulmonary_Hypertension" = "Pulmonary Hypertension",
                              "Stable_Angina_Pectoris" = "Stable AP",
                              "Unstable_Angina_Pectoris" =  "Unstable AP"))

colnames(vIn$E) <- disease
AllGenesHeatmap <- vIn$E
diffGenesHeatmap <- vIn$E[results[,1] !=0,]

#now an old function from the limma package to average your replicates 
AllGenes.AVG <- avearrays(AllGenesHeatmap)
AllGenes.AVG <- AllGenes.AVG[,order(colnames(AllGenes.AVG))]

write.csv(AllGenes.AVG, file="../data/processed_data/LimmaCountsAveraged.csv")

diffGenes.AVG <- avearrays(diffGenesHeatmap)
diffGenes.AVG <- diffGenes.AVG[,order(colnames(diffGenes.AVG))]

pheatmap(mat = AllGenes.AVG, 
         color = myheatcolors,,
         fontsize_col = 11,
         angle_col = 45,
         cellwidth = 30,
         cellheight = 0.02,
         scale="row",
         border_color = NA, 
         show_colnames = TRUE, 
         show_rownames = F, 
         #drop_levels = TRUE, 
         fontsize = 10, 
         cluster_cols = T,
         cluster_rows = T,
         clustering_distance_cols=as.dist(1-cor(AllGenes.AVG, method="spearman")),
         cex=1,
         main = "")

#plot_grid(clustered_data, modsize, HM, ncol = 2, nrow = 2)
clustered_data

modsize

m1

#plot_grid(m1,m2,m3,m4,m5,m6,m7,m8,m9, labels = "AUTO", nrow = 3, ncol = 3)
# Load packages ----
library(tidyverse)
library(limma)
library(DT) 
library(GSEABase) 
library(Biobase) 
library(GSVA) 
library(gprofiler2) 
library(clusterProfiler) 
library(msigdbr) 
library(enrichplot) 

logScale <- function(input, input_start = 1, input_end = 50000, 
                     output_start = 2, output_end = 10) {
  m = (output_end - output_start)/(log(input_end) - log(input_start))
  b = -m * log(input_start) + output_start
  output = m * log(input) + b
  return(output)
}

# Carry out GO enrichment using gProfiler2 ----
# use topTable result to pick the top genes for carrying out a Gene Ontology (GO) enrichment analysis
myTopHitsUp <- resL_sig2[resL_sig2$logFC > 0,]
myTopHitsDown <- resL_sig2[resL_sig2$logFC < 0,]

# use the 'gost' function from the gprofiler2 package to run GO enrichment analysis

datalist = list()

for (i in 1:9) {
  df <- get(paste0("myModule", i))
  gost.res <- gost(rownames(df), 
                   organism = "hsapiens", 
                   correction_method = "fdr",
                   sources = c("GO", "KEGG", "REAC"),
                   custom_bg =rownames(resL_tb))
  if (is.null(gost.res)) {
    next
  } 
  
  gostTable <- data.frame(apply(gost.res[["result"]],2,as.character))
  
  essential_names <- c("source_order", "term_size", "term_name", 
                       "term_id", "source", "significant", "p_value")
  gostTable <- gostTable %>% dplyr::select(essential_names)
  gostTable <- gostTable %>% mutate_at(c('source_order', 'term_size', 'p_value'), as.numeric)
  gostTable$term_size_scaled <- logScale(gostTable$term_size)
  gostTable$module <- as.character(i)
  gostTable <- gostTable[1:5,]
  datalist[[i]] <- gostTable
}

myGO.df = do.call(rbind, datalist)

myGO.df <- myGO.df %>%
  mutate(moduleColor = case_when(
    module == 1 ~ "#762A83",
    module == 2 ~ "#9970AB",
    module == 3 ~ "#C2A5CF",
    module == 4 ~ "#E7D4E8",
    module == 5 ~ "burlywood",
    module == 6 ~ "#D9F0D3",
    module == 7 ~ "#A6DBA0",
    module == 8 ~ "#5AAE61",
    module == 9 ~ "#1B7837"))

ggplot(myGO.df, aes(x = module, y = term_name)) +
  geom_point(aes(size = term_size_scaled,
                 fill = moduleColor, alpha = -log10(p_value)),
             shape = 21) +
  scale_fill_identity() +  
  scale_alpha(range = c(0.4, 1)) +
  theme_bw()


0.5 Gene Set Enrichment Analysis

library(clusterProfiler)
library(enrichplot)
library(ggplot2)
library(pathview)
library(ggridges)

# SET THE DESIRED ORGANISM HERE
organism = "org.Hs.eg.db"
#BiocManager::install(organism, character.only = TRUE)
library(organism, character.only = TRUE)

df <- resL_tb
df <- merge(df, TxOG, by='gene', all.x = T) %>% distinct(., gene, .keep_all = T)
df$entrezid <- unlist(sapply(df$entrezid,"[[",1))

# we want the log2 fold change 
original_gene_list <- df$logFC
# name the vector - 
names(original_gene_list) <- df$entrezid

# omit any NA values 
gene_list<-na.omit(original_gene_list)
removeNAnames <- names(gene_list) == "NA"
gene_list <- gene_list[!removeNAnames]

# sort the list in decreasing order (required for clusterProfiler)
gene_list = sort(gene_list, decreasing = TRUE)
kegg_organism = "hsa"

#After here, you can choose and run the analysis & visualizations according to your needs
#1 KEGG GSEA:####

kk2 <- gseKEGG(geneList     = gene_list,
               organism     = kegg_organism,
               minGSSize    = 10,
               maxGSSize    = 500,
               pvalueCutoff = 0.05,
               pAdjustMethod = "BH",
               keyType       = "ncbi-geneid"
)

pl.tab_kk = kk2@result

datatable(pl.tab_kk,
          extensions = c('KeyTable', "FixedHeader"),
          caption = 'Table 2: GSEA Resultss',
          options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
  formatRound(columns=c(4:8), digits=2)
write.table(pl.tab_kk, file = "../analysis/GSEA/KEGG/KEGG_CUSTOM_allLimma.txt", sep = "\t", quote = F, 
            row.names = F, col.names = T)

#1a. Visualizations : dotplot & ridgeplot

dplot <- dotplot(kk2, title = "KEGG GSEA NSCLC vs. Noncancer", font.size=12,
        showCategory = 10, split=".sign",) + facet_grid(.~.sign)

rplot <- ridgeplot(kk2) + labs(x = "enrichment distribution")

enr <- pairwise_termsim(kk2)
emplot <- emapplot(enr)

#1d.heatplots
p1 <- heatplot(kk2, showCategory=100)
p2 <- heatplot(kk2, foldChange=gene_list, showCategory=10)

dplot

#2. Gene Ontology GSEA####
library(clusterProfiler)
library(enrichplot)
library(ggplot2)
library(pathview)
library(ggridges)

# SET THE DESIRED ORGANISM HERE
organism = "org.Hs.eg.db"
#BiocManager::install(organism, character.only = TRUE)
library(organism, character.only = TRUE)

df <- resL_tb
# we want the log2 fold change 
original_gene_list <- df$logFC
names(original_gene_list) <- df$gene

# omit any NA values 
gene_list<-na.omit(original_gene_list)
# sort the list in decreasing order (required for clusterProfiler)
gene_list = sort(gene_list, decreasing = TRUE)

#analysis - change keytype accordingly, minGSsize is minimum number of genes forming a geneset
gse <- gseGO(geneList=gene_list, 
             ont ="MF", 
             keyType = "SYMBOL", 
             minGSSize = 10, 
             maxGSSize = 500, 
             pvalueCutoff = 0.05, 
             verbose = TRUE, 
             OrgDb = organism, 
             pAdjustMethod = "BH")

pl.tab = gse@result

datatable(pl.tab,
          extensions = c('KeyTable', "FixedHeader"),
          caption = 'Table 2: GSEA Resultss',
          options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
  formatRound(columns=c(4:8), digits=2)
write.table(pl.tab, file = "../analysis/GSEA/GO/GOBP_CUSTOM_MF.txt", sep = "\t", quote = F, 
            row.names = F, col.names = T)


#1a. Visualizations : dotplot & ridgeplot

dp2 <- dotplot(gse, showCategory = 10, title = "GO MF Overrepresentation", font.size=12,
        split=".sign") +
  facet_grid(.~.sign)

rp2 <- ridgeplot(gse) + labs(x = "enrichment distribution")

dp2


0.6 Session Info

sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 14.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Berlin
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggridges_0.5.6              pathview_1.42.0            
##  [3] enrichplot_1.22.0           msigdbr_7.5.1              
##  [5] clusterProfiler_4.10.1      gprofiler2_0.2.3           
##  [7] GSVA_1.50.5                 GSEABase_1.64.0            
##  [9] graph_1.80.0                annotate_1.80.0            
## [11] XML_3.99-0.17               plyr_1.8.9                 
## [13] pheatmap_1.0.12             RColorBrewer_1.1-3         
## [15] ggVennDiagram_1.5.2         DT_0.33                    
## [17] Glimma_2.12.0               EnhancedVolcano_1.20.0     
## [19] ggrepel_0.9.5               org.Hs.eg.db_3.18.0        
## [21] biomaRt_2.58.2              gt_0.11.0                  
## [23] apeglm_1.24.0               MetBrewer_0.2.0            
## [25] ggpubr_0.6.0                swamp_1.5.1                
## [27] MASS_7.3-60.0.1             gplots_3.1.3.1             
## [29] amap_0.8-19                 impute_1.76.0              
## [31] glmGamPoi_1.14.3            DEFormats_1.30.0           
## [33] DESeq2_1.42.1               plotly_4.10.4              
## [35] RUVSeq_1.36.0               EDASeq_2.36.0              
## [37] ShortRead_1.60.0            GenomicAlignments_1.38.2   
## [39] SummarizedExperiment_1.32.0 MatrixGenerics_1.14.0      
## [41] Rsamtools_2.18.0            Biostrings_2.70.3          
## [43] XVector_0.42.0              BiocParallel_1.36.0        
## [45] cowplot_1.1.3               matrixStats_1.3.0          
## [47] edgeR_4.0.16                limma_3.58.1               
## [49] EnsDb.Hsapiens.v86_2.99.0   ensembldb_2.26.1           
## [51] AnnotationFilter_1.26.0     GenomicFeatures_1.54.4     
## [53] AnnotationDbi_1.64.1        GenomicRanges_1.54.1       
## [55] GenomeInfoDb_1.38.8         IRanges_2.36.0             
## [57] S4Vectors_0.40.2            tximport_1.30.0            
## [59] rhdf5_2.46.1                lubridate_1.9.3            
## [61] forcats_1.0.0               stringr_1.5.1              
## [63] dplyr_1.1.4                 purrr_1.0.2                
## [65] tidyr_1.3.1                 tibble_3.2.1               
## [67] ggplot2_3.5.1               tidyverse_2.0.0            
## [69] readr_2.1.5                 GEOquery_2.70.0            
## [71] Biobase_2.62.0              BiocGenerics_0.48.1        
## [73] knitr_1.48                  tinytex_0.51               
## [75] rmarkdown_2.27             
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.4                    ProtGenerics_1.34.0        
##   [3] bitops_1.0-7                HDO.db_0.99.1              
##   [5] httr_1.4.7                  Rgraphviz_2.46.0           
##   [7] numDeriv_2016.8-1.1         tools_4.3.0                
##   [9] backports_1.5.0             utf8_1.2.4                 
##  [11] R6_2.5.1                    HDF5Array_1.30.1           
##  [13] lazyeval_0.2.2              mgcv_1.9-1                 
##  [15] rhdf5filters_1.14.1         withr_3.0.0                
##  [17] prettyunits_1.2.0           gridExtra_2.3              
##  [19] cli_3.6.3                   scatterpie_0.2.3           
##  [21] labeling_0.4.3              sass_0.4.9                 
##  [23] KEGGgraph_1.62.0            mvtnorm_1.2-5              
##  [25] yulab.utils_0.1.4           commonmark_1.9.1           
##  [27] gson_0.1.0                  DOSE_3.28.2                
##  [29] R.utils_2.12.3              bbmle_1.0.25.1             
##  [31] rstudioapi_0.16.0           RSQLite_2.3.7              
##  [33] gridGraphics_0.5-1          generics_0.1.3             
##  [35] BiocIO_1.12.0               hwriter_1.3.2.1            
##  [37] gtools_3.9.5                crosstalk_1.2.1            
##  [39] vroom_1.6.5                 car_3.1-2                  
##  [41] GO.db_3.18.0                Matrix_1.6-5               
##  [43] interp_1.1-6                fansi_1.0.6                
##  [45] abind_1.4-5                 R.methodsS3_1.8.2          
##  [47] lifecycle_1.0.4             yaml_2.3.9                 
##  [49] carData_3.0-5               qvalue_2.34.0              
##  [51] SparseArray_1.2.4           BiocFileCache_2.10.2       
##  [53] grid_4.3.0                  blob_1.2.4                 
##  [55] crayon_1.5.3                bdsmatrix_1.3-7            
##  [57] lattice_0.22-6              beachmat_2.18.1            
##  [59] KEGGREST_1.42.0             pillar_1.9.0               
##  [61] fgsea_1.28.0                rjson_0.2.21               
##  [63] codetools_0.2-20            fastmatch_1.1-4            
##  [65] glue_1.7.0                  ggfun_0.1.5                
##  [67] data.table_1.15.4           treeio_1.26.0              
##  [69] vctrs_0.6.5                 png_0.1-8                  
##  [71] gtable_0.3.5                emdbook_1.3.13             
##  [73] cachem_1.1.0                aroma.light_3.32.0         
##  [75] xfun_0.46                   S4Arrays_1.2.1             
##  [77] tidygraph_1.3.1             coda_0.19-4.1              
##  [79] SingleCellExperiment_1.24.0 statmod_1.5.0              
##  [81] nlme_3.1-165                ggtree_3.10.1              
##  [83] bit64_4.0.5                 progress_1.2.3             
##  [85] filelock_1.0.3              bslib_0.7.0                
##  [87] irlba_2.3.5.1               KernSmooth_2.23-24         
##  [89] colorspace_2.1-0            DBI_1.2.3                  
##  [91] tidyselect_1.2.1            bit_4.0.5                  
##  [93] compiler_4.3.0              curl_5.2.1                 
##  [95] xml2_1.3.6                  DelayedArray_0.28.0        
##  [97] shadowtext_0.1.3            rtracklayer_1.62.0         
##  [99] checkmate_2.3.1             scales_1.3.0               
## [101] caTools_1.18.2              hexbin_1.28.3              
## [103] rappdirs_0.3.3              digest_0.6.36              
## [105] htmltools_0.5.8.1           pkgconfig_2.0.3            
## [107] jpeg_0.1-10                 sparseMatrixStats_1.14.0   
## [109] highr_0.11                  dbplyr_2.5.0               
## [111] fastmap_1.2.0               rlang_1.1.4                
## [113] htmlwidgets_1.6.4           DelayedMatrixStats_1.24.0  
## [115] farver_2.1.2                jquerylib_0.1.4            
## [117] jsonlite_1.8.8              GOSemSim_2.28.1            
## [119] R.oo_1.26.0                 BiocSingular_1.18.0        
## [121] RCurl_1.98-1.16             magrittr_2.0.3             
## [123] ggplotify_0.1.2             GenomeInfoDbData_1.2.11    
## [125] patchwork_1.2.0             Rhdf5lib_1.24.2            
## [127] munsell_0.5.1               Rcpp_1.0.13                
## [129] ggnewscale_0.4.10           babelgene_22.9             
## [131] ape_5.8                     viridis_0.6.5              
## [133] stringi_1.8.4               ggraph_2.2.1               
## [135] zlibbioc_1.48.2             parallel_4.3.0             
## [137] deldir_2.0-4                graphlayouts_1.1.1         
## [139] splines_4.3.0               hms_1.1.3                  
## [141] locfit_1.5-9.10             igraph_2.0.3               
## [143] ggsignif_0.6.4              markdown_1.13              
## [145] reshape2_1.4.4              ScaledMatrix_1.10.0        
## [147] evaluate_0.24.0             latticeExtra_0.6-30        
## [149] tweenr_2.0.3                tzdb_0.4.0                 
## [151] polyclip_1.10-6             ggforce_0.4.2              
## [153] rsvd_1.0.5                  broom_1.0.6                
## [155] xtable_1.8-4                restfulr_0.0.15            
## [157] tidytree_0.4.6              rstatix_0.7.2              
## [159] viridisLite_0.4.2           aplot_0.2.3                
## [161] memoise_2.0.1               timechange_0.3.0